TeaTime4schools: joint analysis - bacteria

Roey Angel 2021-05-21

roey.angel@bc.cas.cz

Taxonomical analysis

This analysis explores the taxonomical distribution patters in the different samples, based on the DADA2-produced sequences. Large parts of this script are based on this protocol and the accompanying publication by Callahan and colleagues (2016).

Setting general parameters:

set.seed(1000)
min_lib_size <- 5000
metadata_path <- "./"
data_path <- "./DADA2_pseudo/"
Metadata_table <- "TeaTime_joint_Bacteria_metadata.csv"
Seq_table <- "DADA2.seqtab_nochim.tsv"
Tax_table <- "DADA2.taxa_silva.tsv"
Proj_name <- "TeaTime4Schools"

Reading in raw data

Read abundance table, taxonomic classification and metadata into a phyloseq object. Also remove sequences detected as contaminants in 03_Decontamination.html.

# read OTU mat from data file
Raw_data <- read_tsv(paste0(data_path, Seq_table), 
                        trim_ws = TRUE)
contaminant_seqs <- read_csv(paste0(data_path, "decontam_contaminants.csv"), 
                        trim_ws = TRUE,
                        col_names = FALSE)

Raw_data %<>% # remove contaminant OTUs. 
  # .[, -grep("CTRL", colnames(.))] %>% # remove ext. cont. 
  .[!(Raw_data$`#OTU` %in% contaminant_seqs$X1), ] 

Raw_data[, 2:ncol(Raw_data)] %>% 
  t() %>% 
  as.data.frame() -> abundance_mat # convert to abundance matrix
colnames(abundance_mat) <- pull(Raw_data, "#OTU") # add sequence names

# Read metadata file
read_csv(paste0(metadata_path, Metadata_table),
         trim_ws = TRUE) %>%
  mutate_at(
    c(
      "Workshop",
      "Season",
      "Run",
      "Type",
      "Sample type",
      "Field",
      "Replicate",
      "Control",
      "Gene"
    ),
    ~factor(.)
  ) %>% 
  mutate_at(c("Extr. Date", "PCR products_16S_send for seq"), ~as.Date(., "%d.%m.%Y")) ->
  Metadata
Metadata$Season %<>% fct_relevel("Winter", "Spring", "Summer", "Autumn")
Metadata$Read1_file <- str_replace(Metadata$Read1_file, "(.*)_L001_R1_001.fastq.gz|\\.1\\.fastq.gz", "\\1")
Metadata <- Metadata[Metadata$Read1_file %in% rownames(abundance_mat), ] # remove metadata rows if the samples did not go through qual processing

# Order abundance_mat samples according to the metadata
sample_order <- match(rownames(abundance_mat), Metadata$Read1_file)
abundance_mat %<>% 
  rownames_to_column('sample_name') %>% 
  arrange(., sample_order) %>% 
  column_to_rownames('sample_name') # needed for phyloseq

Metadata$Library.size <- rowSums(abundance_mat)
Metadata <- data.frame(row.names = Metadata$Read1_file, Metadata)

# read taxonomy from data file
Raw_tax_data <- read_tsv(paste0(data_path, Tax_table), 
                        trim_ws = TRUE, col_names = TRUE)
Raw_tax_data %<>%
  .[!(Raw_tax_data$`Seq#` %in% contaminant_seqs$X1),] %>% # remove contaminant OTUs.
  mutate_all(funs(replace(., is.na(.), "Unclassified"))) # I think mutaute_all is unnecessary here because replace(., is.na(.), "Unclassified") alone should work

Raw_tax_data %>%
  select(.,
         Kingdom = V8,
         Phylum = V9,
         Class = V10,
         Order = V11,
         Family = V12,
         Genus = V13) %>%
  cbind(Name = colnames(abundance_mat),. ) ->
  Taxonomy.bs

Raw_tax_data %>%
  select(.,
         Kingdom,
         Phylum,
         Class,
         Order,
         Family,
         Genus)  %>% 
  # map_dfr(~str_remove_all(., "^.__")) %>% # remove "k__" prefixes 
  add_column(Seq = colnames(abundance_mat)) %>% 
  column_to_rownames("Seq") %>% 
  set_colnames(c("Domain", "Phylum", "Class", "Order", "Family", "Genus")) %>% 
  as.matrix() -> # because tax_table() needs a matrix for retaining row.names
  Taxonomy

# generate phyloseq object
Ps_obj <- phyloseq(otu_table(abundance_mat, taxa_are_rows = FALSE),
                        tax_table(Taxonomy),
                        sample_data(Metadata)
                        )
Ps_obj <- filter_taxa(Ps_obj, function(x) sum(x) > 0, TRUE) # remove 0 abundance taxa
Ps_obj <- subset_samples(Ps_obj, sample_sums(Ps_obj) > 0) # remove 0 abundance samples
# Remove mock and control samples
Ps_obj <- subset_samples(Ps_obj, Type != "Control" & Type != "Mock")

# Create a grouping variable for merging
sample_data(Ps_obj) %<>% 
  as(., "data.frame") %>% 
  # get_variable(., c("Sample.type", "Field", "Season", "Replicate")) %>% 
  unite(., "Description", c("Sample.type", "Field", "Season", "Replicate"), remove = FALSE)
sample_data(Ps_obj)$Description %<>% as_factor(.)

# merged_Ps_obj <- merge_samples(Ps_obj, "Description")
# merged_SD <- merge_samples(sample_data(Ps_obj), "Description")
Ps_obj_merged <- MergeSamples(Ps_obj, grouping_name = "Description")

Exploring Ps_obj dataset features

Taxa-based filtering

First, let’s look at the taxonomic distribution

table(tax_table(Ps_obj_merged)[, "Domain"], exclude = NULL)
## 
##      Archaea     Bacteria    Eukaryota Unclassified 
##           46        15652           45           71
table(tax_table(Ps_obj_merged)[, "Class"], exclude = NULL)
## 
##                   0319-7L14                        ABY1              Acidimicrobiia 
##                          32                           2                         469 
##              Acidobacteriia              Actinobacteria                    AKAU4049 
##                         111                         798                           2 
##         Alphaproteobacteria                Anaerolineae               Armatimonadia 
##                        2110                         313                           7 
##                    AT-s3-28                    Babeliae                     Bacilli 
##                           3                          23                         207 
##                 Bacteroidia    BD2-11_terrestrial_group                      BD7-11 
##                        2173                          43                           8 
## Blastocatellia_(Subgroup_4)                Calditrichia                  Chlamydiae 
##                         153                           2                          70 
##                Chloroflexia            Chthonomonadetes                  Clostridia 
##                         182                           8                          47 
##             Dehalococcoidia                  Deinococci         Deltaproteobacteria 
##                          44                           3                        1763 
##               Elusimicrobia              Entotheonellia               Fibrobacteria 
##                           3                          36                          37 
##              Fimbriimonadia               Fusobacteriia         Gammaproteobacteria 
##                          41                           1                        1782 
##            Gemmatimonadetes                 Gitt-GS-136             Gracilibacteria 
##                         340                          26                           8 
##                  Holophagae             Hydrogenedentia              Ignavibacteria 
##                          45                           1                          44 
##                JG30-KF-CM66                      KD4-96             Ktedonobacteria 
##                          13                          69                           4 
##             Latescibacteria                 Leptospirae                Limnochordia 
##                           9                           4                           2 
##                 Lineage_IIa                 Lineage_IIb               Longimicrobia 
##                          17                          28                          61 
##                   MB-A2-108             Melainabacteria              Microgenomatia 
##                          85                          35                           5 
##                  Mollicutes                        NC10               Negativicutes 
##                           8                          36                           4 
##             Nitriliruptoria             Nitrososphaeria                  Nitrospira 
##                           6                          33                          16 
##                       OLB14                       OM190                 Omnitrophia 
##                          11                         114                           5 
##            Oxyphotobacteria                      P2-11E               Parcubacteria 
##                          88                           4                          21 
##               Phycisphaerae                Pla3_lineage                Pla4_lineage 
##                         270                          20                          40 
##            Planctomycetacia                Rhodothermia               Rubrobacteria 
##                         870                           1                           5 
##     S0134_terrestrial_group             Saccharimonadia           Sericytochromatia 
##                          61                          38                          15 
##                Spirochaetia                 Subgroup_11                 Subgroup_15 
##                           3                           8                          11 
##                 Subgroup_17                 Subgroup_18                 Subgroup_20 
##                          43                           7                           1 
##                 Subgroup_22                 Subgroup_25                  Subgroup_5 
##                          88                          18                          21 
##                  Subgroup_6                  Subgroup_9                 Synergistia 
##                         442                           3                           1 
##         Thermoanaerobaculia             Thermoleophilia              Thermoplasmata 
##                          96                         599                          11 
##                        TK10                Unclassified                   vadinHA49 
##                          69                         604                          57 
##            Verrucomicrobiae               Woesearchaeia                        WWE3 
##                         793                           2                           2
# table(tax_table(Ps_obj_merged)[, "Family"], exclude = NULL)

Now let’s remove some taxa, which are obvious artefacts or those which aren’t bacteria or archaea

domains2remove <- c("", "Eukaryota", "Unclassified")
orders2remove <- c("Chloroplast")
families2remove <- c("Mitochondria")

Ps_obj_filt <- subset_taxa(Ps_obj_merged, !is.na(Phylum) &
                        !Domain %in% domains2remove &
                      !Order %in% orders2remove &
                      !Family %in% families2remove)


Taxonomy.bs %<>% 
  filter(Taxonomy.bs$Name %in% row.names(Ps_obj_filt@tax_table))
sample_data(Ps_obj_filt)$Lib.size <- rowSums(otu_table(Ps_obj_filt))

Now let’s explore the prevalence of different taxa in the database. Prevalence is the number of samples in which a taxa appears at least once. So “Mean prevalence” refers to in how many samples does a sequence belonging to the phylum appears on average, and “Sum prevalence” is the sum of all samples where any sequence from the phylum appears,

prevdf <- apply(X = otu_table(Ps_obj_filt),
                 MARGIN = ifelse(taxa_are_rows(Ps_obj_filt), yes = 1, no = 2),
                 FUN = function(x){sum(x > 0)})
# Add taxonomy and total read counts to this data.frame
prevdf <- data.frame(Prevalence = prevdf,
                      TotalAbundance = taxa_sums(Ps_obj_filt),
                      tax_table(Ps_obj_filt))

prevdf %>%
  group_by(Phylum) %>%
  summarise(`Mean prevalence` = mean(Prevalence),
            `Sum prevalence` = sum(Prevalence)) ->
  Prevalence_phylum_summary

Prevalence_phylum_summary %>% 
  kable(., digits = c(0, 1, 0)) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Phylum Mean prevalence Sum prevalence
Acidobacteria 12.9 13700
Actinobacteria 14.2 28380
Armatimonadetes 6.3 534
Bacteroidetes 13.1 29019
BRC1 6.8 75
Calditrichaeota 4.5 9
Chlamydiae 2.3 161
Chloroflexi 9.6 7138
Cyanobacteria 4.7 344
Deinococcus-Thermus 1.0 3
Dependentiae 2.2 51
Elusimicrobia 2.9 139
Entotheonellaeota 15.6 562
Euryarchaeota 4.5 49
FBP 8.4 126
FCPU426 6.0 6
Fibrobacteres 7.4 274
Firmicutes 5.9 1523
Fusobacteria 1.0 1
GAL15 1.0 1
Gemmatimonadetes 8.1 4128
Hydrogenedentes 19.0 19
Latescibacteria 9.6 799
Nanoarchaeaeota 1.0 2
Nitrospirae 15.8 252
Omnitrophicaeota 1.2 6
Patescibacteria 2.6 206
Planctomycetes 7.7 10630
Proteobacteria 12.9 72598
Rokubacteria 11.9 429
Spirochaetes 2.1 15
Synergistetes 1.0 1
Tenericutes 1.2 10
Thaumarchaeota 27.8 919
Unclassified 5.9 1602
Verrucomicrobia 12.6 9973
WPS-2 3.4 17
WS2 5.0 5
prevdf %>%
  group_by(Order) %>%
  summarise(`Mean prevalence` = mean(Prevalence),
            `Sum prevalence` = sum(Prevalence)) ->
  Prevalence_order_summary

Prevalence_order_summary %>% 
  kable(., digits = c(0, 1, 0)) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Order Mean prevalence Sum prevalence
11-24 9.9 597
211ds20 4.3 13
Absconditabacteriales\_(SR1) 4.3 13
Acetobacterales 12.2 854
Acidiferrobacterales 1.3 8
Acidobacteriales 6.2 112
Actinomarinales 18.0 395
Actinomycetales 1.0 1
Aeromonadales 41.0 41
Anaerolineales 6.1 245
Ardenticatenales 4.7 155
Armatimonadales 1.7 12
Azospirillales 22.0 922
Babeliales 2.2 51
Bacillales 6.3 1198
Bacteroidales 1.0 6
Bacteroidetes\_VC2.1\_Bac22 6.5 85
Bdellovibrionales 6.8 2518
Betaproteobacteriales 18.4 12335
Blastocatellales 13.9 682
C0119 9.2 37
Caedibacterales 5.8 35
Caldilineales 6.7 201
Calditrichales 4.5 9
Candidatus\_Amesbacteria 1.0 1
Candidatus\_Azambacteria 1.0 2
Candidatus\_Campbellbacteria 1.0 1
Candidatus\_Collierbacteria 1.0 2
Candidatus\_Jorgensenbacteria 1.0 2
Candidatus\_Liptonbacteria 1.0 1
Candidatus\_Magasanikbacteria 1.0 1
Candidatus\_Nomurabacteria 2.7 8
Candidatus\_Peribacteria 1.0 3
Candidatus\_Woesebacteria 1.0 1
Candidatus\_Yanofskybacteria 1.0 1
Candidatus\_Yonathbacteria 1.0 2
Caulobacterales 21.8 3442
CCD24 27.7 360
CCM11a 3.6 36
CCM19a 13.0 13
Cellvibrionales 15.5 557
Chitinophagales 12.1 9920
Chlamydiales 2.3 161
Chloroflexales 6.8 688
Chthoniobacterales 10.9 2797
Chthonomonadales 5.6 45
Clostridiales 3.4 160
Corynebacteriales 15.4 1143
Coxiellales 1.0 1
Cytophagales 14.4 6477
Deinococcales 1.0 3
Desulfarculales 8.1 331
Desulfuromonadales 9.8 39
Diplorickettsiales 2.1 196
Dongiales 38.2 687
DS-100 10.5 116
Elsterales 8.3 200
EMP-G18 1.0 2
Enterobacteriales 19.4 775
Entotheonellales 15.6 562
EPR3968-O8a-Bc78 1.0 1
Euzebyales 9.5 57
EV818SWSAP88 1.0 1
Fibrobacterales 7.4 274
Fimbriimonadales 8.1 334
Flavobacteriales 12.9 5003
Frankiales 18.3 1337
Fusobacteriales 1.0 1
Gaiellales 13.6 3144
Gammaproteobacteria\_Incertae\_Sedis 14.0 896
Gemmatales 6.1 2924
Gemmatimonadales 8.6 2922
Glycomycetales 22.0 44
Holosporales 1.5 6
Hydrogenedentiales 19.0 19
Ignavibacteriales 1.0 2
IMCC26256 7.0 640
Isosphaerales 9.2 333
Kallotenuales 3.8 87
KF-JG30-C25 10.0 10
KI89A\_clade 22.0 44
Kineosporiales 26.1 365
Kryptoniales 5.8 35
Lactobacillales 7.0 112
Latescibacterales 8.7 78
Legionellales 2.0 123
Leptospirales 2.2 9
Limnochordales 1.0 2
Longimicrobiales 4.2 257
MBNT15 8.9 89
Methylacidiphilales 7.9 174
Methylococcales 16.0 32
Micavibrionales 4.3 608
Micrococcales 24.2 4481
Micromonosporales 21.1 2614
Micropepsales 20.4 306
Microtrichales 9.8 2840
mle1-8 16.8 67
MVP-88 1.0 3
Mycoplasmatales 2.5 5
Myxococcales 10.2 11013
NB1-j 9.9 582
Nitrosococcales 8.3 25
Nitrososphaerales 27.8 919
Nitrospirales 15.8 252
NKB5 1.0 1
Nostocales 4.5 77
Obscuribacterales 5.4 86
Oceanospirillales 15.7 470
Oligoflexales 4.1 565
Omnitrophales 1.2 6
OPB56 7.8 163
Opitutales 16.5 2420
Oxyphotobacteria\_Incertae\_Sedis 1.0 3
Paracaedibacterales 6.4 115
Parvibaculales 2.0 4
PB19 14.5 58
Pedosphaerales 10.1 1713
PeM15 1.0 2
Phormidesmiales 1.0 3
Phycisphaerales 5.6 611
Pirellulales 10.0 2517
Piscirickettsiales 1.0 1
Pla1\_lineage 1.7 5
Planctomycetales 8.9 841
PLTA13 13.2 661
Propionibacteriales 17.1 2348
Pseudomonadales 17.9 1751
Pseudonocardiales 17.7 1202
Puniceispirillales 8.2 66
Pyrinomonadales 20.2 666
R7C24 9.5 971
RBG-13-54-9 8.6 138
RCP2-54 15.0 270
Reyranellales 18.8 675
Rhizobiales 19.6 13223
Rhodobacterales 10.4 510
Rhodospirillales 7.6 426
Rhodothermales 2.0 2
Rhodovibrionales 1.0 1
Rickettsiales 4.1 509
Rokubacteriales 11.9 429
Rubrobacterales 40.8 204
S-70 1.0 1
S-BQ2-57\_soil\_group 1.0 2
S085 12.8 513
Saccharimonadales 3.8 145
Salinisphaerales 7.5 383
SAR202\_clade 8.5 17
SAR324\_clade(Marine\_group\_B) 4.7 14
SBR1031 9.4 1806
sediment-surface35 1.0 1
Selenomonadales 11.8 47
SJA-28 7.6 99
SM1A07 10.5 42
Sneathiellales 11.5 69
Solibacterales 10.4 898
Solirubrobacterales 11.7 4118
Sphingobacteriales 14.8 6945
Sphingomonadales 18.5 7650
Spirochaetales 2.0 6
Steroidobacterales 13.9 1471
Streptomycetales 18.2 911
Streptosporangiales 7.6 434
Subgroup\_13 1.0 1
Subgroup\_2 5.5 33
Subgroup\_7 12.3 555
Synergistales 1.0 1
Syntrophobacterales 1.6 11
Tepidisphaerales 9.8 1379
Thermoanaerobaculales 8.1 780
Thermomicrobiales 10.4 595
Tistrellales 13.1 692
UA11 4.1 53
Unclassified 9.3 21034
Unknown\_Order 17.8 446
Vampirovibrionales 6.0 114
Verrucomicrobiales 16.2 2741
Xanthomonadales 14.3 3655

Based on that I’ll remove all phyla with a prevalence of under 10

Prevalence_phylum_summary %>% 
  filter(`Sum prevalence` < 10) %>% 
  select(Phylum) %>% 
  map(as.character) %>% 
  unlist() ->
  filterPhyla

Ps_obj_filt2 <- subset_taxa(Ps_obj_filt, !Phylum %in% filterPhyla)
Taxonomy.bs %<>% 
  filter(Taxonomy.bs$Name %in% row.names(Ps_obj_filt2@tax_table))
sample_data(Ps_obj_filt2)$Lib.size <- rowSums(otu_table(Ps_obj_filt2))
print(Ps_obj_filt)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 15568 taxa and 124 samples ]
## sample_data() Sample Data:       [ 124 samples by 26 sample variables ]
## tax_table()   Taxonomy Table:    [ 15568 taxa by 6 taxonomic ranks ]
print(Ps_obj_filt2)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 15551 taxa and 124 samples ]
## sample_data() Sample Data:       [ 124 samples by 26 sample variables ]
## tax_table()   Taxonomy Table:    [ 15551 taxa by 6 taxonomic ranks ]

Plot general prevalence features of the phyla

# Subset to the remaining phyla
prevdf_phylum_filt <- subset(prevdf, Phylum %in% get_taxa_unique(Ps_obj_filt2, "Phylum"))
ggplot(prevdf_phylum_filt,
       aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt2), color = Phylum)) +
  # Include a guess for parameter
  geom_hline(yintercept = 0.05,
             alpha = 0.5,
             linetype = 2) + geom_point(size = 2, alpha = 0.7) +
  scale_x_log10() +  xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
  facet_wrap( ~ Phylum) + theme(legend.position = "none")

Plot general prevalence features of the top 20 orders

# Subset to the remaining phyla
prevdf_order_filt <- subset(prevdf, Order %in% get_taxa_unique(Ps_obj_filt2, "Order"))

# grab the top 30 most abundant orders
prevdf_order_filt %>% 
  group_by(Order) %>%
  summarise(Combined.abundance = sum(TotalAbundance)) %>% 
  arrange(desc(Combined.abundance)) %>% 
  .[1:30, "Order"]  ->
  Orders2plot

prevdf_order_filt2 <- subset(prevdf, Order %in% Orders2plot$Order)

ggplot(prevdf_order_filt2,
       aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt2), color = Order)) +
  # Include a guess for parameter
  geom_hline(yintercept = 0.05,
             alpha = 0.5,
             linetype = 2) + geom_point(size = 2, alpha = 0.7) +
  scale_x_log10() +  xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
  facet_wrap( ~ Order) + theme(legend.position = "none")

Plot general prevalence features of the phyla excluding soil samples

Ps_obj_filt_Teas <- subset_samples(Ps_obj_filt2, Type != "Soil")
prevdf_teas <- apply(X = otu_table(Ps_obj_filt_Teas),
                 MARGIN = ifelse(taxa_are_rows(Ps_obj_filt_Teas), yes = 1, no = 2),
                 FUN = function(x){sum(x > 0)})
# Add taxonomy and total read counts to this data.frame
prevdf_teas <- data.frame(Prevalence = prevdf_teas,
                      TotalAbundance = taxa_sums(Ps_obj_filt_Teas),
                      tax_table(Ps_obj_filt_Teas))

# Subset to the remaining phyla
prevdf_phylum_filt <- subset(prevdf_teas, Phylum %in% get_taxa_unique(Ps_obj_filt_Teas, "Phylum"))
ggplot(prevdf_phylum_filt,
       aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt_Teas), color = Phylum)) +
  # Include a guess for parameter
  geom_hline(yintercept = 0.05,
             alpha = 0.5,
             linetype = 2) + geom_point(size = 2, alpha = 0.7) +
  scale_x_log10() +  xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
  facet_wrap( ~ Phylum) + theme(legend.position = "none")

Plot general prevalence features of the top 20 orders (teabag samples only)

# Subset to the remaining phyla
prevdf_order_filt <- subset(prevdf_teas, Order %in% get_taxa_unique(Ps_obj_filt_Teas, "Order"))

# grab the top 30 most abundant orders
prevdf_order_filt %>% 
  group_by(Order) %>%
  summarise(Combined.abundance = sum(TotalAbundance)) %>% 
  arrange(desc(Combined.abundance)) %>% 
  .[1:30, "Order"]  ->
  Orders2plot

prevdf_order_filt2 <- subset(prevdf, Order %in% Orders2plot$Order)

ggplot(prevdf_order_filt2,
       aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt_Teas), color = Order)) +
  # Include a guess for parameter
  geom_hline(yintercept = 0.05,
             alpha = 0.5,
             linetype = 2) + geom_point(size = 2, alpha = 0.7) +
  scale_x_log10() +  xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
  facet_wrap( ~ Order) + theme(legend.position = "none")

#### Unsupervised filtering by prevalence I’ll remove all sequences which appear in less than 5% of the samples

# Define prevalence threshold as 5% of total samples
prevalenceThreshold <- 0.05 * nsamples(Ps_obj_filt)
prevalenceThreshold
## [1] 6.2
# Execute prevalence filter, using `prune_taxa()` function
keepTaxa <-
  row.names(prevdf_phylum_filt)[(prevdf_phylum_filt$Prevalence >= prevalenceThreshold)]
Ps_obj_filt3 <- prune_taxa(keepTaxa, Ps_obj_filt2)
Taxonomy.bs %<>% 
  filter(Taxonomy.bs$Name %in% row.names(Ps_obj_filt3@tax_table))
sample_data(Ps_obj_filt3)$Lib.size <- rowSums(otu_table(Ps_obj_filt3))
print(Ps_obj_filt2)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 15551 taxa and 124 samples ]
## sample_data() Sample Data:       [ 124 samples by 26 sample variables ]
## tax_table()   Taxonomy Table:    [ 15551 taxa by 6 taxonomic ranks ]
print(Ps_obj_filt3)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 4217 taxa and 124 samples ]
## sample_data() Sample Data:       [ 124 samples by 26 sample variables ]
## tax_table()   Taxonomy Table:    [ 4217 taxa by 6 taxonomic ranks ]

This removed 11334 or 73% of the ESVs!.

However all these removed ESVs accounted for only:

prevdf_phylum_filt %>% 
  arrange(., Prevalence) %>%  
  group_by(Prevalence > prevalenceThreshold) %>% 
  summarise(Abundance = sum(TotalAbundance)) %>%
  mutate(`Rel. Ab.` = percent(Abundance / sum(Abundance))) %>% 
  kable(., digits = c(0, 1, 0)) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Prevalence > prevalenceThreshold Abundance Rel. Ab.
FALSE 99783 2%
TRUE 4207003 98%

So it’s fine to remove them.

Let’s make a fasta file after filtering the sequences:

Seq_file <- "DADA2.Seqs.fa"
readDNAStringSet(paste0(data_path, Seq_file)) %>% 
  .[taxa_names(Ps_obj_filt3)] %>%  
  writeXStringSet(., filepath = paste0(data_path, str_remove(Seq_file, ".fa*"), "_filtered.fa"), format = "fasta", width = 1000)

Plot general prevalence features of the phyla after filtering

# Subset to the remaining phyla
prevdf_phylum_filt <- subset(prevdf, Phylum %in% get_taxa_unique(Ps_obj_filt3, "Phylum"))
ggplot(prevdf_phylum_filt,
       aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt3), color = Phylum)) +
  # Include a guess for parameter
  geom_hline(yintercept = 0.05,
             alpha = 0.5,
             linetype = 2) + geom_point(size = 2, alpha = 0.7) +
  scale_x_log10() +  xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
  facet_wrap( ~ Phylum) + theme(legend.position = "none")

Plot general prevalence features of the top 20 orders

# Subset to the remaining phyla
prevdf_order_filt <- subset(prevdf, Order %in% get_taxa_unique(Ps_obj_filt3, "Order"))

# grab the top 30 most abundant orders
prevdf_order_filt %>% 
  group_by(Order) %>%
  summarise(Combined.abundance = sum(TotalAbundance)) %>% 
  arrange(desc(Combined.abundance)) %>% 
  .[1:30, "Order"]  ->
  Orders2plot

prevdf_order_filt2 <- subset(prevdf, Order %in% Orders2plot$Order)

ggplot(prevdf_order_filt2,
       aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt3), color = Order)) +
  # Include a guess for parameter
  geom_hline(yintercept = 0.05,
             alpha = 0.5,
             linetype = 2) + geom_point(size = 2, alpha = 0.7) +
  scale_x_log10() +  xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
  facet_wrap( ~ Order) + theme(legend.position = "none")

Plot general prevalence features of the phyla excluding soil samples

# Subset to the remaining phyla
prevdf_phylum_filt_teas <- subset(prevdf_teas, Phylum %in%  get_taxa_unique(Ps_obj_filt_Teas, "Phylum"))
ggplot(prevdf_phylum_filt,
       aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt_Teas), color = Phylum)) +
  # Include a guess for parameter
  geom_hline(yintercept = 0.05,
             alpha = 0.5,
             linetype = 2) + geom_point(size = 2, alpha = 0.7) +
  scale_x_log10() +  xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
  facet_wrap( ~ Phylum) + theme(legend.position = "none")

Plot general prevalence features of the top 20 orders

# Subset to the remaining phyla
prevdf_order_filt <- subset(prevdf_teas, Order %in% get_taxa_unique(Ps_obj_filt_Teas, "Order"))

# grab the top 30 most abundant orders
prevdf_order_filt %>% 
  group_by(Order) %>%
  summarise(Combined.abundance = sum(TotalAbundance)) %>% 
  arrange(desc(Combined.abundance)) %>% 
  .[1:30, "Order"]  ->
  Orders2plot

prevdf_order_filt2 <- subset(prevdf, Order %in% Orders2plot$Order)

ggplot(prevdf_order_filt2,
       aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt_Teas), color = Order)) +
  # Include a guess for parameter
  geom_hline(yintercept = 0.05,
             alpha = 0.5,
             linetype = 2) + geom_point(size = 2, alpha = 0.7) +
  scale_x_log10() +  xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
  facet_wrap( ~ Order) + theme(legend.position = "none")

General taxonomic features

# remove samples with reads < min_lib_size
Ps_obj_filt3 %>%
  subset_samples(., sample_sums(Ps_obj_filt3) > min_lib_size) %>%
  filter_taxa(., function(x)
    sum(x) > 0, TRUE) ->
  # transform_sample_counts(., function(x)
    # x / sum(x) * 100) ->
  Ps_obj_filt3_subset

Ps_obj_filt3_subset %>% 
  get_variable() %>% 
  mutate_at(., "Sample.type", 
            ~fct_relevel(., levels = c("Soil", "Green tea", "Rooibos"))) %>% 
    mutate_at(., "Season", 
            ~fct_relevel(., levels = c("Winter", "Spring", "Summer", "Autumn"))) %>% 
  arrange(Field, Sample.type, Season, Replicate) %>% 
  pull(Description) %>% 
  as.character() ->
  Sample.order

Ps_obj_filt3_subset_ra <- transform_sample_counts(Ps_obj_filt3_subset, function(x){x / sum(x)} * 100)

# grabthe top 100 most abundant OTUs
Ps_obj_filt3_subset_100 <-
  prune_taxa(names(sort(taxa_sums(Ps_obj_filt3_subset_ra), TRUE)[1:100]), Ps_obj_filt3_subset_ra)
plot_heatmap(
  Ps_obj_filt3_subset_100,
  method = NULL,
  distance = NULL,
  sample.label = "Description",
  taxa.label = "Order",
  sample.order = Sample.order,
  low = "#000033",
  high = "#FF3300"
)

Ps_obj_filt3_subset_ra %>% 
  subset_taxa(., Phylum == "Proteobacteria") %>% 
  transform_sample_counts(., function(x) x / sum(x) * 100) ->
  Ps_obj_filt3_subset_proteo
plot_heatmap(
  Ps_obj_filt3_subset_proteo,
  method = NULL,
  distance = NULL,
  sample.label = "Description",
  sample.order = Sample.order,
  taxa.label = "Order",
  low = "#000033",
  high = "#FF3300"
)

Ps_obj_filt3_subset_ra %>% 
  subset_taxa(., Phylum == "Actinobacteria") %>% 
  transform_sample_counts(., function(x) x / sum(x) * 100) ->
  Ps_obj_filt3_subset_actino
plot_heatmap(
  Ps_obj_filt3_subset_actino,
  method = NULL,
  distance = NULL,
  sample.label = "Description",
  sample.order = Sample.order,
  taxa.label = "Order",
  low = "#000033",
  high = "#FF3300"
)

Ps_obj_filt3_subset_ra %>% 
  subset_taxa(., Phylum == "Bacteroidetes") %>% 
  transform_sample_counts(., function(x) x / sum(x) * 100) ->
  Ps_obj_filt3_subset_bacter
plot_heatmap(
  Ps_obj_filt3_subset_bacter,
  method = NULL,
  distance = NULL,
  sample.label = "Description",
  sample.order = Sample.order,
  taxa.label = "Order",
  low = "#000033",
  high = "#FF3300"
) 

Ps_obj_filt3_subset_ra %>% 
  subset_taxa(., Phylum == "Acidobacteria") %>% 
  transform_sample_counts(., function(x) x / sum(x) * 100) ->
  Ps_obj_filt3_subset_acido

plot_heatmap(
  Ps_obj_filt3_subset_acido,
  method = NULL,
  distance = NULL,
  sample.label = "Description",
  sample.order = Sample.order,
  taxa.label = "Phylum",
  low = "#000033",
  high = "#FF3300"
)

Ps_obj_filt3_subset_ra %>% 
  subset_taxa(., Order == "Chitinophagales") %>% 
  transform_sample_counts(., function(x) x / sum(x) * 100) ->
  Ps_obj_filt3_subset_chitino
plot_heatmap(
  Ps_obj_filt3_subset_chitino,
  method = NULL,
  distance = NULL,
  sample.label = "Description",
  sample.order = Sample.order,
  taxa.label = "Order",
  low = "#000033",
  high = "#FF3300"
)

Ps_obj_filt3_subset_ra %>% 
  subset_taxa(., Order == "Rhizobiales") %>% 
  transform_sample_counts(., function(x) x / sum(x) * 100) ->
  Ps_obj_filt3_subset_rhizo
plot_heatmap(
  Ps_obj_filt3_subset_rhizo,
  method = NULL,
  distance = NULL,
  sample.label = "Description",
  sample.order = Sample.order,
  taxa.label = "Order",
  low = "#000033",
  high = "#FF3300"
)

Ps_obj_filt3_subset_ra %>% 
  subset_taxa(., Order == "Betaproteobacteriales") %>% 
  transform_sample_counts(., function(x) x / sum(x) * 100) ->
  Ps_obj_filt3_subset_beta
plot_heatmap(
  Ps_obj_filt3_subset_beta,
  method = NULL,
  distance = NULL,
  sample.label = "Description",
  sample.order = Sample.order,
  taxa.label = "Order",
  low = "#000033",
  high = "#FF3300"
)

Ps_obj_filt3_subset_ra %>% 
  subset_taxa(., Order == "Myxococcales") %>% 
  transform_sample_counts(., function(x) x / sum(x) * 100) ->
  Ps_obj_filt3_subset_myxo
plot_heatmap(
  Ps_obj_filt3_subset_myxo,
  method = NULL,
  distance = NULL,
  sample.label = "Description",
  sample.order = Sample.order,
  taxa.label = "Order",
  low = "#000033",
  high = "#FF3300"
)

Let’s look at the agglomerated taxa

Ps_obj_filt3_subset_ra %>% 
  tax_glom(., "Phylum", NArm = TRUE) ->
  Ps_obj_filt_glom

plot_heatmap(
  Ps_obj_filt_glom,
  # method = "NMDS",
  # distance = "bray",
  sample.order = Sample.order,
  sample.label = "Description",
  taxa.label = "Phylum",
  taxa.order = "Phylum",
  low = "#000033",
  high = "#FF3300"
) + theme_bw(base_size = 20) + theme(axis.text.x = element_text(hjust = 1.0, angle = 90.0))

Explore abundance distribution of specific taxa

PlotAbundance(Ps_obj_filt3_subset_ra, phylum2plot = "Bacteroidetes")

# plotBefore <- PlotAbundance(Ps_obj_filt3_subset, taxa = "Proteobacteria")
# plotAfter <- PlotAbundance(Ps_obj_filt3_subset_ra, taxa = "Proteobacteria")
# Combine each plot into one graphic.
# grid.arrange(nrow = 2, plotBefore, plotAfter)
Ps_obj_filt3_subset_ra_taxa <- subset_taxa(Ps_obj_filt3_subset_ra, Order == "Flavobacteriales")
PlotAbundance(Ps_obj_filt3_subset_ra_taxa, phylum2plot = "Bacteroidetes", Facet = "Genus")

Taxa violin plots

Ps_obj_filt3_subset_df <- psmelt(Ps_obj_filt3_subset)

# Ps_obj_filt3_subset_df %>% 
#   filter(Species == "Epibolus pulchripes") ->
#   Ps_obj_filt3_subset_df_EP 
# PlotViolin(Ps_obj_filt3_subset_df_EP)

PlotViolin(Ps_obj_filt3_subset_df)

Taxa box-plot

Ps_obj_filt3_glom <- tax_glom(Ps_obj_filt3_subset_ra, 
                             "Phylum", 
                             NArm = TRUE)
Ps_obj_filt3_glom_DF <- psmelt(Ps_obj_filt3_glom)
Ps_obj_filt3_glom_DF$Phylum %<>% as.character()

# group dataframe by Phylum, calculate median rel. abundance
Ps_obj_filt3_glom_DF %>%
  group_by(Phylum) %>%
  summarise(median = median(Abundance)) ->
  medians

# find Phyla whose rel. abund. is less than 0.5%
Rare_phyla <- medians[medians$median <= 0.005, ]$Phylum

# change their name to "Rare"
Ps_obj_filt3_glom_DF[Ps_obj_filt3_glom_DF$Phylum %in% Rare_phyla, ]$Phylum <- 'Rare'
# re-group
Ps_obj_filt3_glom_DF %>%
  group_by(Sample, Sample.name, Phylum, Sample.type, Field) %>%
  summarise(Abundance = sum(Abundance)) ->
  Ps_obj_filt3_glom_DF_2plot

# ab.taxonomy$Freq <- sqrt(ab.taxonomy$Freq)
# Ps_obj_filt3_glom_rel_DF$Phylum %<>% sub("unclassified", "Unclassified", .)
# Ps_obj_filt3_glom_rel_DF$Phylum %<>% sub("uncultured", "Unclassified", .)

Ps_obj_filt3_glom_DF_2plot %>% 
  group_by(Sample) %>% 
  filter(Phylum == "Rare") %>% 
  summarise(`Rares (%)` = sum(Abundance * 100)) -> 
  Rares
# Percentage of reads classified as rare 
Rares %>%
  kable(., digits = c(2), caption = "Percentage of reads per sample classified as rare:") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Percentage of reads per sample classified as rare:
Sample Rares (%)
Green\_tea\_Cambisol\_Autumn\_1 1.91
Green\_tea\_Cambisol\_Autumn\_2 1.76
Green\_tea\_Cambisol\_Autumn\_3 1.49
Green\_tea\_Cambisol\_Autumn\_4 2.26
Green\_tea\_Cambisol\_Spring\_1 22.50
Green\_tea\_Cambisol\_Spring\_2 0.91
Green\_tea\_Cambisol\_Spring\_3 23.93
Green\_tea\_Cambisol\_Spring\_4 6.89
Green\_tea\_Cambisol\_Summer\_1 10.15
Green\_tea\_Cambisol\_Summer\_2 10.32
Green\_tea\_Cambisol\_Summer\_3 22.58
Green\_tea\_Cambisol\_Summer\_4 5.29
Green\_tea\_Cambisol\_Winter\_1 12.78
Green\_tea\_Cambisol\_Winter\_2 6.25
Green\_tea\_Cambisol\_Winter\_3 6.32
Green\_tea\_Cambisol\_Winter\_4 8.15
Green\_tea\_Fluvisol\_Autumn\_1 2.95
Green\_tea\_Fluvisol\_Autumn\_2 1.10
Green\_tea\_Fluvisol\_Autumn\_3 4.85
Green\_tea\_Fluvisol\_Autumn\_4 0.99
Green\_tea\_Fluvisol\_Spring\_1 2.42
Green\_tea\_Fluvisol\_Spring\_2 7.07
Green\_tea\_Fluvisol\_Spring\_3 10.12
Green\_tea\_Fluvisol\_Spring\_4 0.00
Green\_tea\_Fluvisol\_Summer\_1 21.22
Green\_tea\_Fluvisol\_Summer\_2 9.25
Green\_tea\_Fluvisol\_Summer\_3 7.97
Green\_tea\_Fluvisol\_Summer\_4 15.90
Green\_tea\_Fluvisol\_Winter\_1 2.80
Green\_tea\_Fluvisol\_Winter\_2 10.17
Green\_tea\_Fluvisol\_Winter\_3 5.18
Green\_tea\_Fluvisol\_Winter\_4 2.75
Green\_tea\_Luvisol\_Autumn\_1 0.35
Green\_tea\_Luvisol\_Autumn\_2 1.03
Green\_tea\_Luvisol\_Autumn\_3 0.70
Green\_tea\_Luvisol\_Autumn\_4 17.77
Green\_tea\_Luvisol\_Spring\_1 0.00
Green\_tea\_Luvisol\_Spring\_3 0.00
Green\_tea\_Luvisol\_Spring\_4 1.27
Green\_tea\_Luvisol\_Summer\_2 16.18
Green\_tea\_Luvisol\_Summer\_3 8.81
Green\_tea\_Luvisol\_Winter\_1 30.26
Green\_tea\_Luvisol\_Winter\_2 28.45
Green\_tea\_Luvisol\_Winter\_3 11.62
Green\_tea\_Luvisol\_Winter\_4 13.95
Green\_tea\_Unburied\_Winter\_1 73.37
Rooibos\_Cambisol\_Autumn\_1 2.68
Rooibos\_Cambisol\_Autumn\_2 0.80
Rooibos\_Cambisol\_Autumn\_3 4.50
Rooibos\_Cambisol\_Autumn\_4 0.36
Rooibos\_Cambisol\_Spring\_1 0.73
Rooibos\_Cambisol\_Spring\_2 8.71
Rooibos\_Cambisol\_Spring\_3 0.00
Rooibos\_Cambisol\_Spring\_4 0.79
Rooibos\_Cambisol\_Summer\_1 69.14
Rooibos\_Cambisol\_Summer\_2 83.05
Rooibos\_Cambisol\_Summer\_3 10.11
Rooibos\_Cambisol\_Summer\_4 32.78
Rooibos\_Cambisol\_Winter\_1 4.12
Rooibos\_Cambisol\_Winter\_2 10.99
Rooibos\_Cambisol\_Winter\_3 10.41
Rooibos\_Cambisol\_Winter\_4 18.00
Rooibos\_Fluvisol\_Autumn\_1 2.27
Rooibos\_Fluvisol\_Autumn\_2 1.86
Rooibos\_Fluvisol\_Autumn\_3 6.46
Rooibos\_Fluvisol\_Autumn\_4 8.59
Rooibos\_Fluvisol\_Spring\_1 0.93
Rooibos\_Fluvisol\_Spring\_2 14.71
Rooibos\_Fluvisol\_Spring\_3 1.62
Rooibos\_Fluvisol\_Spring\_4 0.00
Rooibos\_Fluvisol\_Summer\_1 27.02
Rooibos\_Fluvisol\_Summer\_2 24.94
Rooibos\_Fluvisol\_Summer\_3 18.05
Rooibos\_Fluvisol\_Summer\_4 19.01
Rooibos\_Fluvisol\_Winter\_1 9.39
Rooibos\_Fluvisol\_Winter\_2 31.21
Rooibos\_Fluvisol\_Winter\_3 10.02
Rooibos\_Fluvisol\_Winter\_4 18.90
Rooibos\_Luvisol\_Autumn\_1 0.00
Rooibos\_Luvisol\_Autumn\_2 28.30
Rooibos\_Luvisol\_Autumn\_3 6.80
Rooibos\_Luvisol\_Autumn\_4 1.80
Rooibos\_Luvisol\_Spring\_1 5.13
Rooibos\_Luvisol\_Spring\_2 11.37
Rooibos\_Luvisol\_Spring\_3 1.21
Rooibos\_Luvisol\_Spring\_4 9.93
Rooibos\_Luvisol\_Summer\_1 4.94
Rooibos\_Luvisol\_Summer\_2 13.41
Rooibos\_Luvisol\_Summer\_4 7.08
Rooibos\_Luvisol\_Winter\_1 7.42
Rooibos\_Luvisol\_Winter\_2 18.50
Rooibos\_Luvisol\_Winter\_3 4.64
Rooibos\_Luvisol\_Winter\_4 10.43
Rooibos\_Unburied\_Winter\_1 16.60
Soil\_Cambisol\_Autumn\_1 201.50
Soil\_Cambisol\_Autumn\_2 141.37
Soil\_Cambisol\_Autumn\_3 148.25
Soil\_Cambisol\_Spring\_1 84.01
Soil\_Cambisol\_Spring\_2 97.74
Soil\_Cambisol\_Summer\_1 161.58
Soil\_Cambisol\_Summer\_2 128.35
Soil\_Cambisol\_Summer\_3 138.61
Soil\_Cambisol\_Winter\_1 104.15
Soil\_Cambisol\_Winter\_2 102.26
Soil\_Fluvisol\_Autumn\_2 216.91
Soil\_Fluvisol\_Autumn\_3 245.48
Soil\_Fluvisol\_Spring\_1 185.35
Soil\_Fluvisol\_Spring\_2 239.95
Soil\_Fluvisol\_Summer\_1 274.82
Soil\_Fluvisol\_Summer\_2 227.69
Soil\_Fluvisol\_Summer\_3 261.46
Soil\_Fluvisol\_Winter\_1 266.89
Soil\_Fluvisol\_Winter\_2 166.05
Soil\_Luvisol\_Autumn\_1 208.92
Soil\_Luvisol\_Autumn\_2 184.14
Soil\_Luvisol\_Autumn\_3 395.29
Soil\_Luvisol\_Spring\_1 190.94
Soil\_Luvisol\_Spring\_2 156.35
Soil\_Luvisol\_Summer\_1 221.86
Soil\_Luvisol\_Summer\_2 182.94
Soil\_Luvisol\_Summer\_3 216.44
Soil\_Luvisol\_Winter\_1 102.42
Soil\_Luvisol\_Winter\_2 225.64
sample_order <- match(Rares$Sample, row.names(sample_data(Ps_obj_filt3_glom)))
Rares %<>% arrange(., sample_order)

Rares %>% 
  cbind(., sample_data(Ps_obj_filt3_glom)) %>% 
  group_by(Sample.type) %>% 
  summarise(`Rares (%)` = mean(`Rares (%)`)) -> 
  Rares_merged

# Percentage of reads classified as rare 
Rares_merged %>%
  kable(., digits = c(2), caption = "Percentage of reads per sample classified as rare:") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Percentage of reads per sample classified as rare:
Sample.type Rares (%)
Green tea 9.83
Rooibos 12.49
Soil 188.88
Ps_obj_filt3_glom_DF_2plot %>% 
  group_by(Phylum) %>% 
  summarise(sum.Taxa = sum(Abundance)) %>% 
  arrange(desc(sum.Taxa)) -> Taxa.rank
Ps_obj_filt3_glom_DF_2plot$Phylum %<>% 
  factor(., levels = Taxa.rank$Phylum) %>% 
  fct_relevel(., "Rare", after = Inf)
  
p_taxa_box <-
  ggplot(Ps_obj_filt3_glom_DF_2plot, aes(x = Phylum, y = (Abundance))) +
  geom_boxplot(aes(group = interaction(Phylum, Sample.type)), position = position_dodge(width = 0.9), fatten = 1) +
  geom_point(
    aes(colour = Sample.type),
    position = position_jitterdodge(dodge.width = 1),
    alpha = 1 / 2,
    stroke = 0,
    size = 2
  ) +
  # scale_colour_manual(values = pom4, name = "") +
  theme_cowplot(font_size = 11, font_family = f_name) +
  labs(x = NULL, y = "Relative abundance (%)") +
  guides(colour = guide_legend(override.aes = list(size = 5))) +
  facet_grid(Field ~ .) +
  background_grid(major = "xy",
                  minor = "none") +
  theme(axis.text.x = element_text(
    angle = 45,
    vjust = 0.9,
    hjust = 0.9
  ))
print(p_taxa_box)

Make Krona plots

dir.create(paste0(data_path, "Krona/"))
for (i in seq(nsamples(Ps_obj_filt3_subset))) {
  sample_data <-
    data.frame(Abundance = get_taxa(Ps_obj_filt3_subset, i), as(tax_table(Ps_obj_filt3_subset), "matrix"))
  sample_data <- sample_data[sample_data[, 1] > 0, ]
  write_tsv(sample_data, paste0(data_path, "Krona/", sample_names(Ps_obj_filt3_subset)[i], ".tsv"))
}

sample_data(Ps_obj_filt3_subset)$Sample.type %<>% fct_recode(., Green_tea = "Green tea")

sample_data(Ps_obj_filt3_subset)$Sample.type_field_season <- paste0(
  get_variable(Ps_obj_filt3_subset, "Sample.type"),
  "_",
  get_variable(Ps_obj_filt3_subset, "Field"),
  "_",
  get_variable(Ps_obj_filt3_subset, "Season")
)

Ps_obj_filt3_subset %>%
  phyloseq::merge_samples(., "Sample.type_field_season", fun = mean) ->
  merged.Ps_obj

for (i in seq(nsamples(merged.Ps_obj))) {
  sample_data <-
    data.frame(Abundance = get_taxa(merged.Ps_obj, i), as(tax_table(merged.Ps_obj), "matrix"))
  sample_data <- sample_data[sample_data[, 1] > 0, ]
  write_tsv(sample_data, paste0(data_path, "Krona/", sample_names(merged.Ps_obj)[i], ".tsv"))
}

list.files(paste0(data_path, "Krona/"), full.names = TRUE) %>%
  paste(., collapse = " ") %>%
  paste0("/usr/local/bin/ktImportText ", .,
         " -o ",
         data_path,
         "Krona/TeaTime_Krona.html") %>%
  system()

Save filtered phyloseq object

saveRDS(Ps_obj_filt, file = paste0(data_path, Proj_name, "16S_filt.RDS"))
saveRDS(Ps_obj_filt3, file = paste0(data_path, Proj_name, "16S_filt3.RDS"))
sessioninfo::session_info() %>%
  details::details(
    summary = 'Current session info',
    open    = TRUE
 )
Current session info
─ Session info ─────────────────────────────────────────────────────────────────────────
 setting  value                       
 version  R version 4.0.3 (2020-10-10)
 os       Ubuntu 18.04.5 LTS          
 system   x86_64, linux-gnu           
 ui       X11                         
 language (EN)                        
 collate  en_US.UTF-8                 
 ctype    en_US.UTF-8                 
 tz       Europe/Prague               
 date     2021-05-21                  

─ Packages ─────────────────────────────────────────────────────────────────────────────
 package      * version date       lib source        
 ade4           1.7-16  2020-10-28 [1] CRAN (R 4.0.2)
 ape            5.5     2021-04-25 [1] CRAN (R 4.0.3)
 assertthat     0.2.1   2019-03-21 [1] CRAN (R 4.0.2)
 backports      1.2.1   2020-12-09 [1] CRAN (R 4.0.2)
 Biobase        2.50.0  2020-10-27 [1] Bioconductor  
 BiocGenerics * 0.36.1  2021-04-16 [1] Bioconductor  
 biomformat     1.18.0  2020-10-27 [1] Bioconductor  
 Biostrings   * 2.58.0  2020-10-27 [1] Bioconductor  
 broom        * 0.7.6   2021-04-05 [1] CRAN (R 4.0.3)
 cellranger     1.1.0   2016-07-27 [1] CRAN (R 4.0.2)
 cli            2.5.0   2021-04-26 [1] CRAN (R 4.0.3)
 clipr          0.7.1   2020-10-08 [1] CRAN (R 4.0.2)
 cluster        2.1.2   2021-04-17 [1] CRAN (R 4.0.3)
 codetools      0.2-18  2020-11-04 [1] CRAN (R 4.0.2)
 colorspace     2.0-1   2021-05-04 [1] CRAN (R 4.0.3)
 cowplot      * 1.1.1   2020-12-30 [1] CRAN (R 4.0.2)
 crayon         1.4.1   2021-02-08 [1] CRAN (R 4.0.3)
 data.table     1.14.0  2021-02-21 [1] CRAN (R 4.0.3)
 DBI            1.1.1   2021-01-15 [1] CRAN (R 4.0.3)
 dbplyr         2.1.1   2021-04-06 [1] CRAN (R 4.0.3)
 desc           1.3.0   2021-03-05 [1] CRAN (R 4.0.3)
 details        0.2.1   2020-01-12 [1] CRAN (R 4.0.2)
 digest         0.6.27  2020-10-24 [1] CRAN (R 4.0.2)
 doParallel   * 1.0.16  2020-10-16 [1] CRAN (R 4.0.2)
 dplyr        * 1.0.6   2021-05-05 [1] CRAN (R 4.0.3)
 ellipsis       0.3.2   2021-04-29 [1] CRAN (R 4.0.3)
 evaluate       0.14    2019-05-28 [1] CRAN (R 4.0.2)
 extrafont    * 0.17    2014-12-08 [1] CRAN (R 4.0.2)
 extrafontdb    1.0     2012-06-11 [1] CRAN (R 4.0.2)
 fansi          0.4.2   2021-01-15 [1] CRAN (R 4.0.3)
 farver         2.1.0   2021-02-28 [1] CRAN (R 4.0.3)
 forcats      * 0.5.1   2021-01-27 [1] CRAN (R 4.0.3)
 foreach      * 1.5.1   2020-10-15 [1] CRAN (R 4.0.2)
 fs             1.5.0   2020-07-31 [1] CRAN (R 4.0.2)
 generics       0.1.0   2020-10-31 [1] CRAN (R 4.0.2)
 ggplot2      * 3.3.3   2020-12-30 [1] CRAN (R 4.0.2)
 glue           1.4.2   2020-08-27 [1] CRAN (R 4.0.2)
 gridExtra    * 2.3     2017-09-09 [1] CRAN (R 4.0.2)
 gtable         0.3.0   2019-03-25 [1] CRAN (R 4.0.2)
 haven          2.4.1   2021-04-23 [1] CRAN (R 4.0.3)
 highr          0.9     2021-04-16 [1] CRAN (R 4.0.3)
 hms            1.1.0   2021-05-17 [1] CRAN (R 4.0.3)
 htmltools      0.5.1.1 2021-01-22 [1] CRAN (R 4.0.3)
 httr           1.4.2   2020-07-20 [1] CRAN (R 4.0.2)
 igraph         1.2.6   2020-10-06 [1] CRAN (R 4.0.2)
 IRanges      * 2.24.1  2020-12-12 [1] Bioconductor  
 iterators    * 1.0.13  2020-10-15 [1] CRAN (R 4.0.2)
 jsonlite       1.7.2   2020-12-09 [1] CRAN (R 4.0.2)
 kableExtra   * 1.3.4   2021-02-20 [1] CRAN (R 4.0.3)
 knitr        * 1.33    2021-04-24 [1] CRAN (R 4.0.3)
 labeling       0.4.2   2020-10-20 [1] CRAN (R 4.0.2)
 lattice      * 0.20-44 2021-05-02 [1] CRAN (R 4.0.3)
 lifecycle      1.0.0   2021-02-15 [1] CRAN (R 4.0.3)
 lubridate      1.7.10  2021-02-26 [1] CRAN (R 4.0.3)
 magrittr     * 2.0.1   2020-11-17 [1] CRAN (R 4.0.2)
 MASS           7.3-54  2021-05-03 [1] CRAN (R 4.0.3)
 Matrix         1.3-3   2021-05-04 [1] CRAN (R 4.0.3)
 mgcv           1.8-35  2021-04-18 [1] CRAN (R 4.0.3)
 modelr         0.1.8   2020-05-19 [1] CRAN (R 4.0.2)
 multtest       2.46.0  2020-10-27 [1] Bioconductor  
 munsell        0.5.0   2018-06-12 [1] CRAN (R 4.0.2)
 my.tools     * 0.5     2020-09-30 [1] local         
 nlme           3.1-152 2021-02-04 [1] CRAN (R 4.0.3)
 permute      * 0.9-5   2019-03-12 [1] CRAN (R 4.0.2)
 phyloseq     * 1.34.0  2020-10-27 [1] Bioconductor  
 pillar         1.6.1   2021-05-16 [1] CRAN (R 4.0.3)
 pkgconfig      2.0.3   2019-09-22 [1] CRAN (R 4.0.2)
 plyr           1.8.6   2020-03-03 [1] CRAN (R 4.0.2)
 png            0.1-7   2013-12-03 [1] CRAN (R 4.0.2)
 prettyunits    1.1.1   2020-01-24 [1] CRAN (R 4.0.2)
 progress       1.2.2   2019-05-16 [1] CRAN (R 4.0.2)
 purrr        * 0.3.4   2020-04-17 [1] CRAN (R 4.0.2)
 R6             2.5.0   2020-10-28 [1] CRAN (R 4.0.2)
 ragg         * 1.1.2   2021-03-17 [1] CRAN (R 4.0.3)
 Rcpp           1.0.6   2021-01-15 [1] CRAN (R 4.0.3)
 readr        * 1.4.0   2020-10-05 [1] CRAN (R 4.0.2)
 readxl         1.3.1   2019-03-13 [1] CRAN (R 4.0.2)
 reprex         2.0.0   2021-04-02 [1] CRAN (R 4.0.3)
 reshape2       1.4.4   2020-04-09 [1] CRAN (R 4.0.2)
 rhdf5          2.34.0  2020-10-27 [1] Bioconductor  
 rhdf5filters   1.2.1   2021-05-03 [1] Bioconductor  
 Rhdf5lib       1.12.1  2021-01-26 [1] Bioconductor  
 rlang          0.4.11  2021-04-30 [1] CRAN (R 4.0.3)
 rmarkdown    * 2.8     2021-05-07 [1] CRAN (R 4.0.3)
 rprojroot      2.0.2   2020-11-15 [1] CRAN (R 4.0.2)
 rstudioapi     0.13    2020-11-12 [1] CRAN (R 4.0.2)
 Rttf2pt1       1.3.8   2020-01-10 [1] CRAN (R 4.0.2)
 rvest          1.0.0   2021-03-09 [1] CRAN (R 4.0.3)
 S4Vectors    * 0.28.1  2020-12-09 [1] Bioconductor  
 scales       * 1.1.1   2020-05-11 [1] CRAN (R 4.0.2)
 sessioninfo    1.1.1   2018-11-05 [1] CRAN (R 4.0.2)
 stringi        1.6.2   2021-05-17 [1] CRAN (R 4.0.3)
 stringr      * 1.4.0   2019-02-10 [1] CRAN (R 4.0.2)
 survival       3.2-11  2021-04-26 [1] CRAN (R 4.0.3)
 svglite      * 2.0.0   2021-02-20 [1] CRAN (R 4.0.3)
 systemfonts    1.0.2   2021-05-11 [1] CRAN (R 4.0.3)
 textshaping    0.3.4   2021-05-11 [1] CRAN (R 4.0.3)
 tibble       * 3.1.2   2021-05-16 [1] CRAN (R 4.0.3)
 tidyr        * 1.1.3   2021-03-03 [1] CRAN (R 4.0.3)
 tidyselect     1.1.1   2021-04-30 [1] CRAN (R 4.0.3)
 tidyverse    * 1.3.1   2021-04-15 [1] CRAN (R 4.0.3)
 utf8           1.2.1   2021-03-12 [1] CRAN (R 4.0.3)
 vctrs          0.3.8   2021-04-29 [1] CRAN (R 4.0.3)
 vegan        * 2.5-7   2020-11-28 [1] CRAN (R 4.0.3)
 viridisLite    0.4.0   2021-04-13 [1] CRAN (R 4.0.3)
 webshot        0.5.2   2019-11-22 [1] CRAN (R 4.0.2)
 withr          2.4.2   2021-04-18 [1] CRAN (R 4.0.3)
 xfun           0.23    2021-05-15 [1] CRAN (R 4.0.3)
 xml2           1.3.2   2020-04-23 [1] CRAN (R 4.0.2)
 XVector      * 0.30.0  2020-10-27 [1] Bioconductor  
 yaml           2.2.1   2020-02-01 [1] CRAN (R 4.0.2)
 zlibbioc       1.36.0  2020-10-27 [1] Bioconductor  

[1] /home/angel/R/library
[2] /usr/local/lib/R/site-library
[3] /usr/lib/R/site-library
[4] /usr/lib/R/library

References

Callahan BJ, Sankaran K, Fukuyama JA et al. Bioconductor workflow for microbiome data analysis: From raw reads to community analyses. F1000Research 2016;5:1492.